import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# Use of above libraries numerical computation, data manipulation and data visualization
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot
# Plotly library for building a graphs
tcs=pd.read_csv("TCSNS.csv")
tcs.head()
| Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-04-12 | 1505.000000 | 1575.000000 | 1503.974976 | 1569.625000 | 1408.547119 | 6114360 |
| 1 | 2018-04-13 | 1575.000000 | 1620.400024 | 1566.050049 | 1576.650024 | 1414.850952 | 8179118 |
| 2 | 2018-04-16 | 1558.000000 | 1599.974976 | 1555.375000 | 1593.824951 | 1430.263306 | 6270274 |
| 3 | 2018-04-17 | 1593.824951 | 1597.474976 | 1573.599976 | 1583.300049 | 1420.818848 | 3301724 |
| 4 | 2018-04-18 | 1583.000000 | 1596.724976 | 1572.925049 | 1579.550049 | 1417.453369 | 2433066 |
tcs.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1235 entries, 0 to 1234 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 1235 non-null object 1 Open 1235 non-null float64 2 High 1235 non-null float64 3 Low 1235 non-null float64 4 Close 1235 non-null float64 5 Adj Close 1235 non-null float64 6 Volume 1235 non-null int64 dtypes: float64(5), int64(1), object(1) memory usage: 67.7+ KB
tcs['Date']=pd.to_datetime(tcs['Date'])
# to convert date column datatype to Date
print("Dataframe contains stock prices between",tcs.Date.min(),"and",tcs.Date.max())
print("Total Days =",(tcs.Date.max()-tcs.Date.min()).days,"Days")
Dataframe contains stock prices between 2018-04-12 00:00:00 and 2023-04-11 00:00:00 Total Days = 1825 Days
tcs.describe()
| Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|
| count | 1235.000000 | 1235.000000 | 1235.000000 | 1235.000000 | 1235.000000 | 1.235000e+03 |
| mean | 2686.566638 | 2713.224065 | 2657.218266 | 2684.913217 | 2559.095946 | 3.000685e+06 |
| std | 678.613932 | 681.461456 | 674.466401 | 678.072787 | 705.299841 | 1.906813e+06 |
| min | 1505.000000 | 1575.000000 | 1503.974976 | 1569.625000 | 1408.547119 | 1.445300e+05 |
| 25% | 2064.050049 | 2084.974976 | 2040.200012 | 2059.025024 | 1899.312683 | 1.856920e+06 |
| 50% | 2655.000000 | 2676.000000 | 2621.600098 | 2649.600098 | 2533.968750 | 2.540719e+06 |
| 75% | 3293.425049 | 3326.000000 | 3262.300049 | 3290.900024 | 3205.845337 | 3.509598e+06 |
| max | 4033.949951 | 4043.000000 | 3980.000000 | 4019.149902 | 3885.789795 | 2.290380e+07 |
# create box plot to check the outliers
tcs[["Open",'High','Low','Close','Adj Close']].plot(kind='box')
<AxesSubplot:>
# Setting the Layout for our Plot using Plotly
layout=go.Layout(title='Stock Prices of TCS',xaxis=dict(title="Date"),yaxis=dict(title='Price'))
tcs_data=[{'x':tcs['Date'],'y':tcs['Close']}]
plot=go.Figure(data=tcs_data,layout=layout)
plot
# Building the Regression Model
from sklearn.model_selection import train_test_split
#For Preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
# For Model Evaluation andChecking the Accuracy
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score
C:\Users\tanma\anaconda3\lib\site-packages\scipy\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.2
# Splitting the Data into Training Set and Testing Set
# x variable contains the independent features and y-variable has the dependent features
x=np.array(tcs.index).reshape(-1,1)
y=tcs['Close']
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=101)
#Feature Scaling for standardizing the dataset
scaler=StandardScaler().fit(x_train)
from sklearn.linear_model import LinearRegression
lm=LinearRegression()
lm.fit(x_train,y_train)
LinearRegression()
# Plot Actual and Predicted Values for Train Dataset
trace0= go.Scatter(x=x_train.T[0],y=y_train,mode="markers",name="Actual")
trace1 = go.Scatter(x=x_train.T[0],y=lm.predict(x_train).T,mode='lines',name='predicted')
tcs_data=[trace0,trace1]
layout.xaxis.title.text='Day'
plot2=go.Figure(data=tcs_data, layout=layout)
plot2
score=f'''
{'Metrix'.ljust(10)}{'train'.center(20)}{'test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(y_train,lm.predict(x_train))}\t{r2_score(y_test,lm.predict(x_test))}
{'MSE'.ljust(10)}{mse(y_train,lm.predict(x_train))}\t{mse(y_test,lm.predict(x_test))}
'''
print(score)
Metrix train test r2_score 0.7700219820352993 0.7871548799849638 MSE 105159.11866541322 97951.27305330055
# Using LSTM Deep Learning for prediction
# To Analysis the Sequence of the Data
# To Messure the Drooupout Rate
from keras.models import Sequential
from keras.layers import Dense,LSTM,Dropout
data=pd.read_csv('TCS Training.csv')
data.head()
| Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 0 | 12-04-2018 | 1505.000000 | 1575.000000 | 1503.974976 | 1569.625000 | 1408.547119 | 6114360 |
| 1 | 13-04-2018 | 1575.000000 | 1620.400024 | 1566.050049 | 1576.650024 | 1414.850952 | 8179118 |
| 2 | 16-04-2018 | 1558.000000 | 1599.974976 | 1555.375000 | 1593.824951 | 1430.263306 | 6270274 |
| 3 | 17-04-2018 | 1593.824951 | 1597.474976 | 1573.599976 | 1583.300049 | 1420.818848 | 3301724 |
| 4 | 18-04-2018 | 1583.000000 | 1596.724976 | 1572.925049 | 1579.550049 | 1417.453369 | 2433066 |
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 920 entries, 0 to 919 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 920 non-null object 1 Open 920 non-null float64 2 High 920 non-null float64 3 Low 920 non-null float64 4 Close 920 non-null float64 5 Adj Close 920 non-null float64 6 Volume 920 non-null int64 dtypes: float64(5), int64(1), object(1) memory usage: 50.4+ KB
data['Close']=pd.to_numeric(data.Close,errors='coerce')
data=data.dropna()
traindata=data.iloc[:,4:5].values
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 920 entries, 0 to 919 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 920 non-null object 1 Open 920 non-null float64 2 High 920 non-null float64 3 Low 920 non-null float64 4 Close 920 non-null float64 5 Adj Close 920 non-null float64 6 Volume 920 non-null int64 dtypes: float64(5), int64(1), object(1) memory usage: 50.4+ KB
sc=MinMaxScaler(feature_range=(0,1))
traindata=sc.fit_transform(traindata)
traindata.shape
(920, 1)
x_train=[]
y_train=[]
for i in range(60,920):
x_train.append(traindata[i-60:i,0])
y_train.append(traindata[i,0])
x_train,y_train=np.array(x_train),np.array(y_train)
x_train=np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1)) # adding the batch_size axis
x_train.shape
(860, 60, 1)
model=Sequential()
model.add(LSTM(units=100,return_sequences=True,input_shape=(x_train.shape[1],1)))
model.add(Dropout(0.2))
model.add(LSTM(units=100,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=100,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=100,return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam',loss='mean_squared_error')
hist=model.fit(x_train,y_train,epochs=20,batch_size=32,verbose=2)
Epoch 1/20 27/27 - 12s - loss: 0.0333 - 12s/epoch - 461ms/step Epoch 2/20 27/27 - 5s - loss: 0.0056 - 5s/epoch - 174ms/step Epoch 3/20 27/27 - 5s - loss: 0.0046 - 5s/epoch - 170ms/step Epoch 4/20 27/27 - 5s - loss: 0.0046 - 5s/epoch - 172ms/step Epoch 5/20 27/27 - 5s - loss: 0.0036 - 5s/epoch - 167ms/step Epoch 6/20 27/27 - 5s - loss: 0.0038 - 5s/epoch - 167ms/step Epoch 7/20 27/27 - 6s - loss: 0.0032 - 6s/epoch - 232ms/step Epoch 8/20 27/27 - 7s - loss: 0.0035 - 7s/epoch - 246ms/step Epoch 9/20 27/27 - 6s - loss: 0.0034 - 6s/epoch - 239ms/step Epoch 10/20 27/27 - 6s - loss: 0.0028 - 6s/epoch - 235ms/step Epoch 11/20 27/27 - 6s - loss: 0.0027 - 6s/epoch - 238ms/step Epoch 12/20 27/27 - 7s - loss: 0.0033 - 7s/epoch - 241ms/step Epoch 13/20 27/27 - 6s - loss: 0.0038 - 6s/epoch - 238ms/step Epoch 14/20 27/27 - 6s - loss: 0.0026 - 6s/epoch - 228ms/step Epoch 15/20 27/27 - 6s - loss: 0.0023 - 6s/epoch - 235ms/step Epoch 16/20 27/27 - 6s - loss: 0.0025 - 6s/epoch - 232ms/step Epoch 17/20 27/27 - 7s - loss: 0.0026 - 7s/epoch - 258ms/step Epoch 18/20 27/27 - 6s - loss: 0.0025 - 6s/epoch - 237ms/step Epoch 19/20 27/27 - 6s - loss: 0.0022 - 6s/epoch - 229ms/step Epoch 20/20 27/27 - 9s - loss: 0.0028 - 9s/epoch - 333ms/step
plt.plot(hist.history['loss'])
plt.title('Training Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train'], loc='upper right')
plt.show()
testdata=pd.read_csv("TCS Test.csv")
testdata['Close']=pd.to_numeric(testdata.Close,errors="coerce")
testdata=testdata.dropna()
testdata=testdata.iloc[:,4:5]
y_test=testdata.iloc[60:,0:].values
# input array for the model
inputclosing=testdata.iloc[:,0:].values
inputclosing_scaled=sc.transform(inputclosing)
inputclosing_scaled.shape
x_test=[]
length=len(testdata)
timestep=60
for i in range(timestep,length):
x_test.append(inputclosing_scaled[i-timestep:i,0])
x_test=np.array(x_test)
x_test=np.reshape(x_test,(x_test.shape[0],x_test.shape[1],1))
x_test.shape
(255, 60, 1)
y_pred=model.predict(x_test)
y_pred
8/8 [==============================] - 0s 51ms/step
array([[0.9699101 ],
[0.973404 ],
[0.9759752 ],
[0.977911 ],
[0.9797219 ],
[0.9813508 ],
[0.98208266],
[0.98133975],
[0.9789395 ],
[0.975015 ],
[0.969714 ],
[0.9624284 ],
[0.9524859 ],
[0.9403732 ],
[0.9275488 ],
[0.9154215 ],
[0.90462357],
[0.89546245],
[0.8881281 ],
[0.8828883 ],
[0.87958294],
[0.877889 ],
[0.8770355 ],
[0.8765748 ],
[0.87576383],
[0.87418604],
[0.87172127],
[0.8683422 ],
[0.86418283],
[0.85955626],
[0.85462886],
[0.85007197],
[0.8464737 ],
[0.8429838 ],
[0.83893573],
[0.8341906 ],
[0.82871896],
[0.82194096],
[0.81389815],
[0.80529624],
[0.79764634],
[0.79217285],
[0.7894852 ],
[0.79006684],
[0.79401934],
[0.80092007],
[0.8094769 ],
[0.8185993 ],
[0.82755315],
[0.8353694 ],
[0.8403552 ],
[0.8413437 ],
[0.83794904],
[0.830123 ],
[0.8181102 ],
[0.8028081 ],
[0.7861381 ],
[0.7700738 ],
[0.75672954],
[0.74742943],
[0.74280244],
[0.7427606 ],
[0.7464735 ],
[0.75263816],
[0.7602676 ],
[0.767986 ],
[0.7745512 ],
[0.77947086],
[0.78284985],
[0.78485763],
[0.7847197 ],
[0.78166276],
[0.77517694],
[0.7651311 ],
[0.7519513 ],
[0.7369655 ],
[0.7217589 ],
[0.7083135 ],
[0.69820994],
[0.6922533 ],
[0.69047886],
[0.6920504 ],
[0.69631034],
[0.70305675],
[0.712215 ],
[0.72345024],
[0.73605317],
[0.7494517 ],
[0.7631193 ],
[0.7765377 ],
[0.78921896],
[0.80057824],
[0.81055534],
[0.81902695],
[0.82586414],
[0.8310084 ],
[0.83433837],
[0.83586353],
[0.8355451 ],
[0.8329919 ],
[0.82789356],
[0.8200721 ],
[0.80983394],
[0.7972906 ],
[0.7833617 ],
[0.7686922 ],
[0.75394124],
[0.7398356 ],
[0.7269997 ],
[0.71607345],
[0.7076485 ],
[0.70238906],
[0.7007466 ],
[0.7026252 ],
[0.7068117 ],
[0.71184945],
[0.7159389 ],
[0.71794033],
[0.7175327 ],
[0.7146862 ],
[0.7097985 ],
[0.70334196],
[0.69597405],
[0.6885288 ],
[0.6818617 ],
[0.6763268 ],
[0.6720869 ],
[0.6690182 ],
[0.667569 ],
[0.6681475 ],
[0.67059714],
[0.67483693],
[0.68026084],
[0.6863399 ],
[0.69259536],
[0.69857925],
[0.7039873 ],
[0.70884603],
[0.7130484 ],
[0.7167121 ],
[0.7198138 ],
[0.72243875],
[0.72467583],
[0.72651905],
[0.72796565],
[0.72920257],
[0.7308308 ],
[0.733179 ],
[0.7360474 ],
[0.7391314 ],
[0.74221903],
[0.74501115],
[0.74720675],
[0.74932224],
[0.75202984],
[0.75571525],
[0.7605637 ],
[0.76642185],
[0.772863 ],
[0.7789638 ],
[0.7841261 ],
[0.7879712 ],
[0.79085004],
[0.7932705 ],
[0.7956931 ],
[0.798347 ],
[0.8012447 ],
[0.8048235 ],
[0.8091127 ],
[0.8138179 ],
[0.8182611 ],
[0.82181305],
[0.82387507],
[0.82377154],
[0.8211821 ],
[0.8165666 ],
[0.8108821 ],
[0.80472356],
[0.79810655],
[0.7907862 ],
[0.7829917 ],
[0.7753162 ],
[0.7683371 ],
[0.7623032 ],
[0.75748414],
[0.7540863 ],
[0.7521809 ],
[0.7517648 ],
[0.7526571 ],
[0.7545817 ],
[0.7575614 ],
[0.7616058 ],
[0.76652974],
[0.7713855 ],
[0.77597415],
[0.78008264],
[0.7838417 ],
[0.78743577],
[0.7912203 ],
[0.7951408 ],
[0.79925555],
[0.80362374],
[0.80806065],
[0.81222755],
[0.81614935],
[0.82002264],
[0.82391626],
[0.8276264 ],
[0.8310506 ],
[0.8335951 ],
[0.8350968 ],
[0.8359456 ],
[0.8367332 ],
[0.837761 ],
[0.8391687 ],
[0.8412708 ],
[0.8443579 ],
[0.8484457 ],
[0.85294217],
[0.85729426],
[0.86122984],
[0.8648446 ],
[0.86789775],
[0.87003833],
[0.8708081 ],
[0.8696478 ],
[0.8664311 ],
[0.86133724],
[0.8543353 ],
[0.8455391 ],
[0.835825 ],
[0.82580036],
[0.8161387 ],
[0.80762917],
[0.8010116 ],
[0.7963791 ],
[0.7935011 ],
[0.7917103 ],
[0.78997433],
[0.7874468 ],
[0.78365636],
[0.7785314 ],
[0.7721208 ],
[0.76448864],
[0.7560913 ],
[0.7474955 ],
[0.73925704],
[0.7318706 ],
[0.7256574 ],
[0.72094446],
[0.7183368 ],
[0.7181373 ],
[0.72054404],
[0.7252632 ],
[0.7320195 ]], dtype=float32)
predicted_price=sc.inverse_transform(y_pred)
plt.plot(y_test,color='red',label='Actual Stock Price')
plt.plot(predicted_price,color='green',label='Predicted Stock Price')
plt.title("TCS Stock Price Prediction")
plt.xlabel("Time")
plt.ylabel("Stock Price")
plt.legend()
plt.show()